*06long_run.do
cap clear

global indir1 "/disk/bulkw/mrashid/matching_project/master_crosswalks"
global censdata "/homes/data/census-ipums/v2019/dta/"
global crosswalks "/disk/bulkw/karger/census_bulk/citylonglat"

global root    = "/disk/bulkw/nencka/schooling_pandemic/2021_10_18_final/"
global input   "$root/Input"
global scripts "$root/Scripts"
global temp    "$root/Temp"
global output  "$root/Output"
global log     "$root/Log"
global figures "$root/Figures"

cap log close
log using      "$log/06_results_longrun", replace text

set scheme plotplain, perm 


*****************************************************************************
*****************************************************************************
*****************************************************************************

*Load schooling laws from Claudia Goldin

	use "$input/cg_cs_laws.dta", clear

	*Keep 1920 data
	keep if year == 1920

	keep stfip labage 

	rename (stfip labage) (statefip age_work_permit) 
	sort statefip
	save "$temp/cg_cs_laws_1920.dta", replace




*var lists 

global vars_1920 "race namefrst namelast statefip sea metro city stdcity enumdist county urban farm ownershp mortgage famsize citizen bpl fbpl fbplstr mbpl mbplstr nativity yrimmig yrsusa1 yrsusa2  lit school speakeng  pageno nchild nchlt5 relate mtongue serial pernum poploc momloc"
global vars_1940 "mtongue gq gqtype age marst bpl empstat classwkr incnonwg wkswork1 wkswork2 hrswork1 hrswork2 relate incwage urban higrade educ enumdist statefip countyicp metro metarea metdist farm ownershp ward rent sea city labforce occ1950 ind1950 occscore nchild nchlt5 migcounty migsea5 migplac5 mcit5str migcity5 nativity citizen"

global father_vars_1920 "bpl citizen nativity yrimmig yrsusa1 yrsusa2 speakeng marst labforce occ1950 occstr occscore ind1950 mtongue"
global mother_vars_1920 "bpl citizen nativity yrimmig yrsusa1 yrsusa2 speakeng marst labforce occ1950 occstr occscore ind1950 mtongue"


*************
* 1920-1940 *
*************
cd $indir1
use crosswalk_1920_1940
desc, fullnames
tab link_abe_nysiis_standard, m
keep if link_abe_nysiis_standard == 1

ren  histid_1920 histid
duplicates drop histid, force
merge 1:m histid using $censdata//1920.dta, keepusing($vars_1920) keep(1 3) nogen

duplicates drop histid, force

foreach v in $vars_1920 {
	rename `v' `v'_20
}

ren histid  histid_1920


*1940
ren histid_1940 histid
duplicates drop histid, force
merge 1:1 histid using $censdata//1940.dta, keepusing($vars_1940) keep(1 3) nogen

duplicates drop histid, force

foreach v in $vars_1940 {
	rename `v' `v'_40
}

ren histid histid_1940

*dad's characteristics in 1920*

ren serial_20 serial
ren poploc_20 pernum

merge m:1 serial pernum using $censdata//1920.dta, keepusing($father_vars_1920) keep(1 3) nogen

foreach v in $father_vars_1920 {
	rename `v' `v'_pop 
	}

	
ren pernum poploc_20

*mom's chars in 1920

ren momloc_20 pernum

merge m:1 serial pernum using $censdata//1920.dta, keepusing($mother_vars_1920) keep(1 3) nogen

foreach v in $mother_vars_1920 {
	rename `v' `v'_mom
	}

	
ren pernum momloc_20



desc, fullnames
save $temp//censuslinkingproject_20_40.dta, replace

*/

*****************************************************************************
*****************************************************************************
*****************************************************************************


use $temp//censuslinkingproject_20_40.dta, clear

ren statefip_20 statefip
ren stdcity_20 mcd

replace mcd = lower(mcd)

gsort statefip mcd 
merge m:1 statefip mcd using "$temp/school_closures_towns_1920.dta"

tab _merge, m
tab mcd statefip if _merge==2
keep if _merge == 3
drop _merge


*Merge on mortality data

	sort statefip mcd  
	merge m:1 statefip mcd using "$temp/flu_mortality.dta"
	tab _m 

	tab mcd if _merge==2
	drop if _merge == 2
	drop _merge 

 	egen std_excess_death = std(excess_death_ratio)
	sum std_excess_death, d
	replace std_excess_death = r(p99) if std_excess_death> r(p99) & !missing(std_excess_death)
	replace std_excess_death = r(p1) if std_excess_death< r(p1) & !missing(std_excess_death)



	egen mcd_c = group(statefip mcd)

	gen age_at_1920 = age_40 -20
	

	cap drop age_bin
	gen age_bin = 1 if inrange(age_at_1920,0,5)
	replace age_bin = 2 if inrange(age_at_1920,6,10)
	replace age_bin = 3 if inrange(age_at_1920,11,14)
	replace age_bin = 4 if inrange(age_at_1920,15,18)
	replace age_bin = 5 if inrange(age_at_1920,19,21)
	replace age_bin = 6 if inrange(age_at_1920,22,25)
	tab age_bin, m
	keep if !missing(age_bin)


*****************************************************************************
*****************************************************************************
*****************************************************************************


*Generate heterogenity variables

	*Top occupation dad
	gen top_dad_occupation = 1 if occscore_pop > 25 & ~mi(occscore_pop)
	replace top_dad_occupation = 0 if occscore_pop <= 25  & ~mi(occscore_pop)

	*Assign birthplace (via https://usa.ipums.org/usa-action/variables/BPL#codes_section)
	gen dad_foreign = 1 if fbpl_20 > 13000 & fbpl_20 < 90000  & ~mi(fbpl_20)
	replace dad_foreign = 0 if mi(dad_foreign) & ~mi(fbpl_20)

	tab top_dad_occupation dad_foreign, mi

	*Generate race variable
	gen black = 1 if inlist(race,200,210)
	replace black = 0 if mi(black)



*Create Census region indicators

	gen region = 1 if inlist(statefip, 9, 23, 25, 33, 44, 50, 34, 36, 42)
	replace region = 2 if inlist(statefip, 17, 18, 26, 39, 55, 19, 20, 27, 29, 31, 38, 46)
	replace region = 3 if inlist(statefip, 10, 11, 12, 13, 24, 37, 45, 51, 54, 1, 21, 28, 47, 5, 22, 40, 48)
	replace region = 4 if inlist(statefip, 4, 8, 16, 30, 32, 35, 49, 56, 2, 6, 15, 41, 53)
	tab region, m

			
*Clean up education variable
	
	gen in_school = 1 if school_20 == 2
	replace in_school = 0 if school_20 == 1

	gen educ_years = . if higrade_40==. | higrade_40==0 | higrade_40==999
	replace educ_years = 0 if higrade_40==10
	replace educ_years = higrade_40/10 - 3 if higrade_40 >=30 & higrade_40< 999
	
	tab educ_years in_school, m

	gen race = race_20
	gen birthyr = 1940 - age_40
		
	cap drop _m 
	sort statefip mcd
	rename days_closed days_closed_pre_m
	merge m:1 statefip mcd using "$temp/city_covariates.dta"
	drop days_closed
	rename days_closed_pre_m days_closed
	drop _m 



*Clean non-education outcomes, including wage income, non-wage income, and annual hours worked

	sum incwage_40, d
	replace incwage_40=. if incwage_40>900000
	replace incwage_40=1 if incwage_40<1
	replace incwage_40=1 if missing(incwage_40) & inlist(gq_40,3,4)

	sum incwage_40, d
	bys gq_40: sum incwage_40, d

	gen ind_gte50_incwage = incwage_40>=50 & !missing(incwage_40)

	gen incwage_40_orig = incwage_40
	replace incwage_40=log(incwage_40) if !missing(incwage_40)


	tab incnonwg_40, m
	replace incnonwg_40=. if incnonwg_40==9

	replace incnonwg_40=0 if incnonwg_40==1
	replace incnonwg_40=0 if missing(incnonwg_40) & inlist(gq_40,3,4)

	replace incnonwg_40=1 if incnonwg_40==2
	tab incnonwg_40, m

	gen annual_hours = wkswork1_40*hrswork1_40
	sum annual_hours, d
	replace annual_hours = 1 if annual_hours<1
	replace annual_hours = log(annual_hours)

	gen completed_gs = 1 if educ_years >= 8 & ~mi(educ_years)
	gen completed_hs = 1 if educ_years >= 12 &  ~mi(educ_years)
	gen any_college = 1 if educ_years > 12 &  ~mi(educ_years)
	gen completed_ba = 1 if educ_years >= 16 &  ~mi(educ_years)

	foreach var of varlist completed_gs completed_hs any_college completed_ba { 
		replace `var' = 0 if mi(`var') & ~mi(educ_years)
		tab  educ_years `var'
		}


gen closed = 1 if days_closed != 0
replace closed = 0 if days_closed == 0 

sort statefip
merge m:1 statefip using "$temp/cg_cs_laws_1920.dta"
drop if _m == 2
drop _m 

*Impute DC
gen can_work = 1 if (age_at_1920 >= age_work_permit) | age_work_permit == 0  & ~mi(age_work_permit)
replace can_work = 0 if age_at_1920 < age_work_permit & ~mi(age_work_permit)

tab age_at_1920 can_work, mi

tab race, m
tab mcd_c, m

gen weeks_closed_3wks = days_closed/21
su weeks_closed_3wks days_closed, de 

gen ihs_days_closed = log(days_closed  + sqrt(days_closed ^2 + 1))

save $temp//longrun_analysis_sample.dta, replace
*/
use $temp//longrun_analysis_sample.dta, clear

*****************************************************************************
*****************************************************************************
*****************************************************************************


*completed_gs completed_hs any_college completed_ba

local ind_covs "race##i.birthyr race##mcd_c"
local city_by_covs "c.in_school_6_10_avg##i.birthyr c.in_school_11_14_avg##i.birthyr c.in_school_15_18_avg##i.birthyr c.occscore_base_avg##i.birthyr c.foreignb_avg##i.birthyr c.count##i.birthyr"

foreach var of varlist educ_years incwage_40 incnonwg_40 annual_hours  {

	di "`var'"

*Baseline regressions

	*Region-by-birth year fixed effects

		reghdfe `var' ib1.age_bin##c.weeks_closed_3wks, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
		parmest, format(estimate min95 max95) level(95) saving("$temp/longrun_regionbyr_`var'", replace) 

	*State-by-birth year fixed effects

		reghdfe `var' ib1.age_bin##c.weeks_closed_3wks, absorb(statefip##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
		parmest, format(estimate min95 max95) level(95) saving("$temp/longrun_statebyr_`var'", replace) 

	*State clustered errors

		reghdfe `var' ib1.age_bin##c.weeks_closed_3wks, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(statefip)
		parmest, format(estimate min95 max95) level(95) saving("$temp/longrun_statec_`var'", replace) 

*Baseline regressions specification checks

		*No zeros
		reghdfe `var' ib1.age_bin##c.weeks_closed_3wks if days_closed > 0, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
		parmest, format(estimate min95 max95) level(95) saving("$temp/longrun_nozero_`var'", replace) 

		*IHS 
		reghdfe `var' ib1.age_bin##c.ihs_days_closed, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
		parmest, format(estimate min95 max95) level(95) saving("$temp/longrun_ihs_`var'", replace) 


*Baseline regressions with mortality controls

	*Region-by-birth year fixed effects

		reghdfe `var' ib1.age_bin##c.weeks_closed_3wks ib1.age_bin##c.std_excess_death, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
		parmest, format(estimate min95 max95) level(95) saving("$temp/longrun_regionbyr_mort_`var'", replace) 

*Baseline regressions with control for whether you can work

	*Region-by-birth year fixed effects
		reghdfe `var' ib1.age_bin##c.weeks_closed_3wks can_work, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
		parmest, format(estimate min95 max95) level(95) saving("$temp/longrun_laborl_`var'", replace) 


*Baseline regressions excluding late work people
	tab age_at_1920 can_work
	*Region-by-birth year fixed effects
		reghdfe `var' ib1.age_bin##c.weeks_closed_3wks if (can_work == 1 | age_at_1920 < 14), absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
		parmest, format(estimate min95 max95) level(95) saving("$temp/longrun_earlyw_`var'", replace) 

*Heterogeneity,region-by-birth year fixed effects

	*Father's occupational score split

		reghdfe `var' ib1.age_bin##c.weeks_closed_3wks if top_dad_occupation==1, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
		parmest, format(estimate min95 max95) level(95) saving("$temp/longrun_heterogeneous_focchigh_`var'", replace) 

		reghdfe `var' ib1.age_bin##c.weeks_closed_3wks if top_dad_occupation==0, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
		parmest, format(estimate min95 max95) level(95) saving("$temp/longrun_heterogeneous_focclow_`var'", replace) 


	*Father's birthplace split

		reghdfe `var' ib1.age_bin##c.weeks_closed_3wks if dad_foreign==1, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
		parmest, format(estimate min95 max95) level(95) saving("$temp/longrun_heterogeneous_fbornfor_`var'", replace) 

		reghdfe `var' ib1.age_bin##c.weeks_closed_3wks if dad_foreign==0, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
		parmest, format(estimate min95 max95) level(95) saving("$temp/longrun_heterogeneous_fbornus_`var'", replace) 


	*Race split

		reghdfe `var' ib1.age_bin##c.weeks_closed_3wks if  black==1, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
		parmest, format(estimate min95 max95) level(95) saving("$temp/longrun_heterogeneous_black_`var'", replace) 

		reghdfe `var' ib1.age_bin##c.weeks_closed_3wks if black==0, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
		parmest, format(estimate min95 max95) level(95) saving("$temp/longrun_heterogeneous_nonblack_`var'", replace) 


}


*Additional education variables
foreach var of varlist completed_gs completed_hs any_college completed_ba  {

	di "`var'"

*Baseline regressions

	*Region-by-birth year fixed effects

		reghdfe `var' ib1.age_bin##c.weeks_closed_3wks, absorb(region##birthyr `ind_covs' `city_by_covs')  cluster(mcd_c)
		parmest, format(estimate min95 max95) level(95) saving("$temp/longrun_regionbyr_`var'", replace) 

}
clear
log close 
